1. PPS Data Preparation

Author

Colin J Lee

Clean up codebook

Code
pp_codebook <- pp_codebook %>%
  row_to_names(row_number = 1)

#remove all rows not related to traits, emotions, and DIAMONDS
#and remove sad and anxiety emotion, redundant with neuroticism
pp_codebook <- pp_codebook[-c(61:63,72:74, 76, 80, 89:186),]

Clean pp so it only has relevant variables

Code
pp_wide <- pp_wide %>%
  select(SID:EM10, N01:N12, O01:O12) %>%
  arrange(SID, Date) %>%
  select(-StartDate)
Code
### how many participants initially in dataset?
pp_wide %>%
  group_by(SID) %>%
  distinct(SID) %>%
  nrow() #172
[1] 172
Code
# Min 50 obs
pp_wide <- pp_wide %>%
  group_by(SID) %>%
  filter(n() >= 50) %>% 
  arrange(SID, Date) %>%
  mutate(all_beeps = seq(1, n(), 1)) %>%
  ungroup()

length(unique(pp_wide$SID)) #96
[1] 96
Code
survey_count <- pp_wide %>% group_by(SID) %>% count() %>% ungroup() %>% arrange(n)

Make it long and merge reverse code from codebook and reverse code

Code
pp_long_updated <- pp_wide %>%
  pivot_longer(
    cols = -c(SID:HourBlock1, all_beeps),
    names_to = "trait",
    values_to = "value"
  ) %>%
  rename("New #" = "trait") #for merging codebook; we'll change it back

pp_long_updated <- pp_long_updated %>%
  left_join(
    pp_codebook %>%
      select('New #', Reverse, Facet, Inventory, Trait, 'Modified Item')
  )
Joining with `by = join_by(`New #`)`
Code
pp_long_updated <- pp_long_updated %>%
  mutate(value = ifelse(is.na(Reverse), value, 
         as.numeric(6 - value))) %>% #6 bc only the trait items are reverse coded (max-min - value)
  select(-Reverse) %>%
  rename("trait" = "New #") 

Create facets

Code
#first, subset traits
pp_long_traits <- pp_long_updated %>%
  filter(Inventory == "BFI-2") %>%
  group_by(SID, Date, Facet, Day, Hour, HourBlock, HourBlock1, all_beeps) %>%
  mutate(facet_value = mean(value, na.rm =TRUE)) %>% #mean score on facet
  distinct_at(vars(SID, Date, Facet, facet_value)) %>% #keep unique rows
  mutate_all(~ifelse(is.nan(.), NA, .)) %>%
  arrange(SID, Date) %>%
  ungroup()
`mutate_all()` ignored the following grouping variables:
• Columns `SID`, `Date`, `Facet`, `Day`, `Hour`, ...
ℹ Use `mutate_at(df, vars(-group_cols()), myoperation)` to silence the message.
Code
#subset emotion and diamonds
pp_long_emo <- pp_long_updated %>%
  filter(Inventory == "Affect") %>%
  select(c(SID:all_beeps, value, 'Modified Item')) %>%
  rename("Facet" = 'Modified Item')

pp_long_sit <- pp_long_updated %>%
  filter(Inventory == "S8-I") %>%
  select(c(SID:all_beeps, value, Trait)) %>%
  rename("Facet" = 'Trait')


#join all three dfs
pp_long_comp <- full_join(pp_long_traits, pp_long_emo) 
Joining with `by = join_by(Day, Hour, HourBlock, HourBlock1, all_beeps, SID,
Date, Facet)`
Code
pp_long_comp <- full_join(pp_long_comp, pp_long_sit) 
Joining with `by = join_by(Day, Hour, HourBlock, HourBlock1, all_beeps, SID,
Date, Facet, value)`
Code
#1 column for values
pp_long_comp$val <- coalesce(pp_long_comp$value, pp_long_comp$facet_value)

#order columns
pp_long <- pp_long_comp[,c(6, 7, 2:5, 8:11)] %>%
  arrange(SID, Date)

Make it wide again

Code
pp_wide <- pp_long %>%
 pivot_wider(
   id_cols = (SID:all_beeps),
   names_from = "Facet",
   values_from = "val"
    )

gdata::keep(pp_wide, sure = TRUE)
wd <- getwd()
setwd(wd)

describe(pp_wide %>% select(-Date))

How much missingness of facets before imputation?

Code
pp_miss <- pp_wide %>% select(Compassion:"Creative Imagination")
sum(is.na(pp_miss))/(8010*15)
[1] 0.2168373
Code
#21.7%

IMPUTATION TIME BABY

Code
pp_mi <- data.frame(unclass(pp_wide %>% select(-Date, -Hour, -HourBlock, -HourBlock1)))

pp_mi <- amelia(pp_mi, m = 1, ts = "all_beeps", cs = "SID")$imputations[[1]] %>%
  as_tibble() %>%
  full_join(pp_wide %>% select(SID, Date, Hour, HourBlock, HourBlock1, all_beeps)) %>%
  select(-all_beeps); pp_mi
Warning in amcheck(x = x, m = m, idvars = numopts$idvars, priors = priors, : The variable Deception is perfectly collinear with another variable in the data.
Warning: There are observations in the data that are completely missing. 
         These observations will remain unimputed in the final datasets. 
-- Imputation 1 --

  1  2  3  4  5  6  7  8  9 10 11
Joining with `by = join_by(SID, all_beeps)`
Code
#delete HourBlock column, will be created in next script.
pp_mi <- pp_mi %>% select(-HourBlock)
#remove two NA rows
pp_mi <- pp_mi[complete.cases(pp_mi), ]

# check ranges after imputation
psych::describe(pp_mi %>% select(-Date)) # min and max values outside of 1-5 range
Code
# restrict range of state values after MI
pp <- pp_mi[,2:16]
pp[ pp > 5 ] <- 5
pp[ pp < 1] <- 1

sits <- pp_mi[,27:34]
sits[ sits > 3 ] <- 3
sits[ sits < 1] <- 1

#recombine
pp_mi_range <- cbind(pp_mi$SID, pp_mi$Date, pp_mi$Hour, pp_mi$HourBlock1, pp)
pp_mi_range_sit <- cbind(pp_mi$SID, pp_mi$Date, pp, sits)
#order columns
colnames(pp_mi_range_sit)
 [1] "pp_mi$SID"              "pp_mi$Date"             "Compassion"            
 [4] "Respectfulness"         "Trust"                  "Organization"          
 [7] "Productiveness"         "Responsibility"         "Sociability"           
[10] "Assertiveness"          "Energy.Level"           "Anxiety"               
[13] "Depression"             "Emotional.Volatility"   "Intellectual.Curiosity"
[16] "Aesthetic.Sensitivity"  "Creative.Imagination"   "Duty"                  
[19] "Intellect"              "Adversity"              "Mating"                
[22] "pOsitivity"             "Negativity"             "Deception"             
[25] "Sociality"             
Code
pp_mi_range <- pp_mi_range[,c(1:4, 11, 13, 12, 7, 6, 5, 9, 10, 8, 15, 14, 16, 17, 18, 19)]
pp_mi_range_sit <- pp_mi_range_sit[,c(1, 2, 9, 11, 10, 5, 4, 3, 7, 8, 6, 13, 12, 14, 15, 16, 17, 18:25)]
Code
colnames(pp_mi_range) <- c("SID", "Date", "Hour", "HourBlock1", "Sociability", "EnergyLevel", "Assertive", "Trust", "Respect", "Compassion", "Productivity", "Responsibility", "Organization", "Depression", "Anxiety", "EmotionalVol", "IntCuriosity", "AesthSense", "CrtvImagination")

sit_names <- colnames(pp_mi_range_sit[18:25])


colnames(pp_mi_range_sit) <- c("SID", "Date", "Sociability", "EnergyLevel", "Assertive", "Trust", "Respect", "Compassion", "Productivity", "Responsibility", "Organization", "Depression", "Anxiety", "EmotionalVol", "IntCuriosity", "AesthSense", "CrtvImagination", sit_names)



pp_wide <- pp_mi_range
pp_wide_sit <- pp_mi_range_sit
rm(pp, pp_mi, pp_mi_range)

describe(pp_wide)
Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
Code
describe(pp_wide_sit)
Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
Code
pomp <- function(x, na){
  (x - min(x, na.rm = na))/(max(x, na.rm = na) - min(x, na.rm = na))*100
} 

pp_wide_POMP <- pp_wide %>%
  mutate(
    Sociability = pomp(Sociability, TRUE)
    , EnergyLevel =  pomp(EnergyLevel, TRUE)
    , Assertive =  pomp(Assertive, TRUE)
    , Trust =  pomp(Trust, TRUE)
    , Respect =  pomp(Respect, TRUE)
    , Compassion =  pomp(Compassion, TRUE)
    , Productivity =  pomp(Productivity, TRUE)
    , Responsibility =  pomp(Responsibility, TRUE)
    , Organization =  pomp(Organization, TRUE)
    , Depression =  pomp(Depression, TRUE)
    , Anxiety =  pomp(Anxiety, TRUE)
    , EmotionalVol =  pomp(EmotionalVol, TRUE)
    , IntCuriosity =  pomp(IntCuriosity, TRUE)
    , AesthSense =  pomp(AesthSense, TRUE)
    , CrtvImagination =  pomp(CrtvImagination, TRUE)
  ) %>% 
  ungroup()

pp_wide_POMP[,5:19] <- round(pp_wide_POMP[,5:19], 2)
#describe(pp_wide) #cool

pp_wide <- pp_wide_POMP

POMP situations df

Code
pp_wide_sit <- pp_wide_sit %>%
  mutate(
    Sociability = pomp(Sociability, TRUE)
    , EnergyLevel =  pomp(EnergyLevel, TRUE)
    , Assertive =  pomp(Assertive, TRUE)
    , Trust =  pomp(Trust, TRUE)
    , Respect =  pomp(Respect, TRUE)
    , Compassion =  pomp(Compassion, TRUE)
    , Productivity =  pomp(Productivity, TRUE)
    , Responsibility =  pomp(Responsibility, TRUE)
    , Organization =  pomp(Organization, TRUE)
    , Depression =  pomp(Depression, TRUE)
    , Anxiety =  pomp(Anxiety, TRUE)
    , EmotionalVol =  pomp(EmotionalVol, TRUE)
    , IntCuriosity =  pomp(IntCuriosity, TRUE)
    , AesthSense =  pomp(AesthSense, TRUE)
    , CrtvImagination =  pomp(CrtvImagination, TRUE)
  ) %>% 
  ungroup()

pp_wide_sit[,3:17] <- round(pp_wide_sit[,3:17], 2)
Code
describe(pp_wide_sit)
Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
Code
# No NAs/NANs/Infs
sum(is.na(pp_wide)) #0
[1] 0
Code
sum(apply(pp_wide,2,is.nan)) #0
[1] 0
Code
sum(apply(pp_wide,2,is.infinite)) #0
[1] 0
Code
sum(pp_wide < 0) #0
[1] 0

LOOK AT VARIABLE VARIANCES

Descriptives

Code
#make it long
pp_long <- pp_wide %>%
  pivot_longer(
    cols = Sociability:CrtvImagination
    , names_to = c("facet")
    , values_to = "value"
  ) %>%
  arrange(SID, Date)

# function for mean, sd, median, min, max, n, n missing
descriptive_fun <- function(df, var) {
  df %>%
  summarize(
      mean = mean({{ var }},   na.rm = TRUE),
      sd     = sd({{ var }},     na.rm = TRUE),
      median = median({{ var }}, na.rm = TRUE),
      min    = min({{ var }},    na.rm = TRUE),
      max    = max({{ var }},    na.rm = TRUE),
      mode = DescTools::Mode({{ var }},    na.rm = TRUE),
      # omega  = omega({{var}}),
      # alpha  = alpha({{var}}),
      n      = n(),
      .groups = "drop"
      )
}

pp_descriptives <- pp_long %>%
  group_by(SID, facet) %>% # we want descriptive for each trait for each participant
  descriptive_fun(var = value) %>% 
  ungroup()
Registered S3 method overwritten by 'DescTools':
  method         from 
  reorder.factor gdata

Look at no variance participant-variables

Code
pp_no_var <- pp_descriptives %>%
  filter(sd == 0)  #fuck; 5 variables
pp_no_var
Code
pp_low_var <- pp_descriptives %>%
  filter(sd < 5)
pp_low_var
Code
#cut em
'%nin%' <- function(x,y)!('%in%'(x,y))
pp_wide <- pp_wide %>%
  filter(SID %nin% pp_no_var$SID)

pp_wide_sit <- pp_wide_sit %>%
  filter(SID %nin% pp_no_var$SID)

Look at low variance participant-variables

Code
pp_descriptives %>%
  filter(sd < 10) 

Look at median = 0 or 100

Code
pp_descriptives %>%
  filter(median == 0 | median == 100)
Code
# alot of median = 0. or 100 but chill variance

Reverse Score Neuroticism

Code
pp_wide_sit <- pp_wide_sit %>%
  mutate(Depression = 100 - Depression,
         Anxiety = 100 - Anxiety,
         EmotionalVol = 100 - EmotionalVol) %>%
  rename(Depression_r = Depression,
         Anxiety_r = Anxiety,
         EmotionalVol_r = EmotionalVol)
Code
#list
pp_list_pre <- split(pp_wide_sit, f = pp_wide_sit$SID)

#nested
pp_nested_pre_no_profiles <- data.table::rbindlist(pp_list_pre, fill=TRUE) %>%
  group_by(SID) %>%
  nest() %>%
  ungroup() %>%
  arrange(SID)
Code
save(pp_list_pre, file = "Data/pp_list_pre.RData")
save(pp_wide_sit, file = "Data/pp_wide_sit.RData")
save(pp_nested_pre_no_profiles, file = "Data/pp_nested_pre_no_profiles.RData")